---
title: "Data Visualization to End Police Violence"
author: "Tim Fraser, Northeastern University"
subtitle: Replication Code no. 1
output:
  pdf_document: default
  html_notebook: default
---


This code document describes how to replicate the figure "Fatal Shootings of People of Color by Police", posted on Facebook on June 10.

# 0. Load Packages

```{r}
# Load packages
library(tidyverse)
library(wesanderson)
```


# 1. Bar Chart


```{r}
# Download Data from Washington Post's Github Page
dat <- read_csv("https://raw.githubusercontent.com/washingtonpost/data-police-shootings/master/fatal-police-shootings-data.csv")
# Read more about the data here:
# https://github.com/washingtonpost/data-police-shootings

dat %>% head()
```


```{r}
dat %>%
  # Create an indicator for year, and count only killings for full years between 2015 and 2019
  mutate(year = date %>% str_sub(1,4)) %>%
  filter(year < 2020) %>%
  # Recode race into readable categories
  mutate(race = race %>% dplyr::recode(
    "W" = "White, non-Hispanic",
    "B" = "Black, non-Hispanic",
    "A" = "Asian",
    "N" = "Native American",
    "H" = "Hispanic",
    "O" = "Other")) %>%
  # Tally the total number of police shootings per racial/ethnic group
  group_by(year, race) %>%
  count() %>%
  ungroup() %>%
  # Now take the five-year average from 2015 to 2019 for each group
  group_by(race) %>%
  summarize(shootings = mean(n, na.rm = TRUE))
```





```{r}
shootings <- dat %>%
  # Create an indicator for year, and count only killings for full years between 2015 and 2019
  mutate(year = date %>% str_sub(1,4)) %>%
  filter(year < 2020) %>%
  # Recode race into readable categories
  mutate(race = if_else(race %in% c("W", "B"), race, "Other") %>%
           dplyr::recode(
             "W" = "White, non-Hispanic",
             "B" = "Black, non-Hispanic")) %>%
  # Tally the total number of police shootings per racial/ethnic group
  group_by(year, race) %>%
  count() %>%
  ungroup() %>%
  # Now take the five-year total from 2015 to 2019 for each group
  group_by(race) %>%
  summarize(shootings = sum(n, na.rm = TRUE)) %>% 
  ungroup() %>%
  # Calculate percentage of shootings over five years
  mutate(total = sum(shootings, na.rm = TRUE)) %>%
  mutate(percent = shootings / total)

```



# Now, let's take the population distribution from 2015-2019 based on the Census's five-year American Community Survey.

```{r}
# Most recent data from 2019
# https://www.census.gov/quickfacts/fact/table/US/PST045219

census <- data.frame(
  race = c("White, non-Hispanic", "Black, non-Hispanic", "Other"),
  percent = c(.765, .134, .101),
  total = 328239523
) %>%
  # Now estimate actual population of African Americans and White Americans
  mutate(pop = total * percent)

```






```{r}
# Actual percentage of police shootings across 5-years
shootings
# What would match the distribution of the population in 2019
census

viz <- bind_rows(
  shootings %>%
    select(race, percent) %>%
    mutate(type = "Actual"),
  
  census %>%
    select(race, percent) %>%
    mutate(type = "Fair"),
  
  data.frame(
    race = c("Black, non-Hispanic", "White, non-Hispanic", "Other"),
    percent = c(0.02, 0.02, 0.02),
    type = "Just")
    # Instead of zero, making it very small so that the color is still visible
) %>%
  mutate(race = race %>% dplyr::recode_factor(
    "Black, non-Hispanic" = "Black",
    "Other" = "Other People of Color",
    "White, non-Hispanic" = "White")) %>%

  mutate(label = if_else(race == "Black", 
                         true = if_else(percent == 0.02, true = paste(0, "%", sep = ""), 
                                        false = paste(round(percent, 2)*100, "%", sep = "")), 
                         false = NA_character_)) %>%
  mutate(description = type %>% dplyr::recode(
    "Actual" = "Actual Share of\nFatal Shootings by Police",
    "Fair" = "If Shootings were\nproportional to the \nUS Population",
    "Just" = "If there were Zero\nShootings by Police")) %>%
    mutate(position = type %>% dplyr::recode(
    "Actual" = -5,
    "Fair" = -5,
    "Just" = 6)) %>%
  mutate(overhead = type %>% dplyr::recode(
    "Actual" = "Black Americans \n represent 24% of \n fatal shootings by police...",
    "Fair" = "Double their share \n of the \n US population...",
    "Just" = "Justice means \n no more \n shootings by police."))

```



```{r, message = FALSE, warning = FALSE}
viz %>%
  ggplot(mapping = aes(y = percent * 100, x = description, 
                       fill = reorder(race, -percent), 
                       label = label)) +
  geom_col(position = "stack", color = "white", size = 1.5) +
  geom_text(nudge_y = viz$position) +
  facet_wrap(~overhead, scales = "free_x") +
  scale_fill_manual(values = c("grey", "#46ACC8", "#B40F20")) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_blank(), 
        panel.background = element_blank(),
        panel.grid = element_blank(), 
        strip.background = element_rect(fill = "grey", color = NA)) +
  guides(fill = guide_legend(reverse=TRUE)) +
  labs(x = "", y = "% of Fatal Shootings by Police", 
       fill = "Americans by Race",
       title = "Fatal Shootings of People of Color by Police",
       caption = "Sources: Washington Post 2015-2019, American Community Survey 2015-2019.\nLearn more at www.joincampaignzero.org")
```


```{r}
remove(viz, shootings, census)

```



